IBIN = blktapctrl tapdisk
QCOW_UTIL = img2qcow qcow2raw qcow-create
-INST_DIR = /usr/sbin
+INST_DIR = /usr/sbin
LIBAIO_DIR = ../../libaio/src
CFLAGS += -Werror
# Get gcc to generate the dependencies for us.
CFLAGS += -Wp,-MD,.$(@F).d
-DEPS = .*.d
+DEPS = .*.d
THREADLIB := -lpthread -lz
LIBS := -L. -L.. -L../lib
AIOLIBS := $(LIBAIO_DIR)/libaio.a
-BLK-OBJS := block-aio.o
-BLK-OBJS += block-sync.o
+BLK-OBJS := block-aio.o
+BLK-OBJS += block-sync.o
BLK-OBJS += block-vmdk.o
-BLK-OBJS += block-ram.o
+BLK-OBJS += block-ram.o
BLK-OBJS += block-qcow.o
BLK-OBJS += aes.o
qcow-util: img2qcow qcow2raw qcow-create
img2qcow qcow2raw qcow-create: %: $(BLK-OBJS)
- $(CC) $(CFLAGS) -o $* $(BLK-OBJS) $*.c $(AIOLIBS) $(LIBS)
+ $(CC) $(CFLAGS) -o $* $(BLK-OBJS) $*.c $(AIOLIBS) $(LIBS)
install: all
- $(INSTALL_PROG) $(IBIN) $(QCOW_UTIL) $(DESTDIR)$(INST_DIR)
+ $(INSTALL_PROG) $(IBIN) $(QCOW_UTIL) $(VHD_UTIL) $(DESTDIR)$(INST_DIR)
clean:
- rm -rf *.o *~ $(DEPS) xen TAGS $(IBIN) $(LIB) $(QCOW_UTIL)
+ rm -rf *.o *~ $(DEPS) xen TAGS $(IBIN) $(LIB) $(QCOW_UTIL) $(VHD_UTIL)
.PHONY: clean install
td_callback_t cb;
int id;
void *private;
+ uint64_t lsec;
};
struct tdaio_state {
return 0;
}
+static inline void init_fds(struct disk_driver *dd)
+{
+ int i;
+ struct tdaio_state *prv = (struct tdaio_state *)dd->private;
+
+ for(i = 0; i < MAX_IOFD; i++)
+ dd->io_fd[i] = 0;
+
+ dd->io_fd[0] = prv->poll_fd;
+}
+
/* Open the disk file and initialize aio state. */
-int tdaio_open (struct td_state *s, const char *name)
+int tdaio_open (struct disk_driver *dd, const char *name)
{
int i, fd, ret = 0;
- struct tdaio_state *prv = (struct tdaio_state *)s->private;
- s->private = prv;
+ struct td_state *s = dd->td_state;
+ struct tdaio_state *prv = (struct tdaio_state *)dd->private;
DPRINTF("block-aio open('%s')", name);
/* Initialize AIO */
prv->fd = fd;
+ init_fds(dd);
ret = get_image_info(s, fd);
+
done:
return ret;
}
-int tdaio_queue_read(struct td_state *s, uint64_t sector,
- int nb_sectors, char *buf, td_callback_t cb,
- int id, void *private)
+int tdaio_queue_read(struct disk_driver *dd, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private)
{
struct iocb *io;
struct pending_aio *pio;
- struct tdaio_state *prv = (struct tdaio_state *)s->private;
+ struct td_state *s = dd->td_state;
+ struct tdaio_state *prv = (struct tdaio_state *)dd->private;
int size = nb_sectors * s->sector_size;
uint64_t offset = sector * (uint64_t)s->sector_size;
long ioidx;
pio->cb = cb;
pio->id = id;
pio->private = private;
+ pio->lsec = sector;
io_prep_pread(io, prv->fd, buf, size, offset);
io->data = (void *)ioidx;
prv->iocb_queue[prv->iocb_queued++] = io;
-
+
return 0;
}
-int tdaio_queue_write(struct td_state *s, uint64_t sector,
- int nb_sectors, char *buf, td_callback_t cb,
- int id, void *private)
+int tdaio_queue_write(struct disk_driver *dd, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private)
{
struct iocb *io;
struct pending_aio *pio;
- struct tdaio_state *prv = (struct tdaio_state *)s->private;
+ struct td_state *s = dd->td_state;
+ struct tdaio_state *prv = (struct tdaio_state *)dd->private;
int size = nb_sectors * s->sector_size;
uint64_t offset = sector * (uint64_t)s->sector_size;
long ioidx;
pio->cb = cb;
pio->id = id;
pio->private = private;
+ pio->lsec = sector;
io_prep_pwrite(io, prv->fd, buf, size, offset);
io->data = (void *)ioidx;
prv->iocb_queue[prv->iocb_queued++] = io;
-
+
return 0;
}
-int tdaio_submit(struct td_state *s)
+int tdaio_submit(struct disk_driver *dd)
{
int ret;
- struct tdaio_state *prv = (struct tdaio_state *)s->private;
+ struct tdaio_state *prv = (struct tdaio_state *)dd->private;
ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
/* Success case: */
prv->iocb_queued = 0;
- return ret;
-}
-
-int *tdaio_get_fd(struct td_state *s)
-{
- struct tdaio_state *prv = (struct tdaio_state *)s->private;
- int *fds, i;
-
- fds = malloc(sizeof(int) * MAX_IOFD);
- /*initialise the FD array*/
- for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
-
- fds[0] = prv->poll_fd;
-
- return fds;
+ return 0;
}
-int tdaio_close(struct td_state *s)
+int tdaio_close(struct disk_driver *dd)
{
- struct tdaio_state *prv = (struct tdaio_state *)s->private;
+ struct tdaio_state *prv = (struct tdaio_state *)dd->private;
io_destroy(prv->aio_ctx);
close(prv->fd);
-
+
return 0;
}
-int tdaio_do_callbacks(struct td_state *s, int sid)
+int tdaio_do_callbacks(struct disk_driver *dd, int sid)
{
int ret, i, rsp = 0;
struct io_event *ep;
- struct tdaio_state *prv = (struct tdaio_state *)s->private;
+ struct tdaio_state *prv = (struct tdaio_state *)dd->private;
/* Non-blocking test for completed io. */
ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events,
struct pending_aio *pio;
pio = &prv->pending_aio[(long)io->data];
- rsp += pio->cb(s, ep->res == io->u.c.nbytes ? 0 : 1,
+ rsp += pio->cb(dd, ep->res == io->u.c.nbytes ? 0 : 1,
+ pio->lsec, io->u.c.nbytes >> 9,
pio->id, pio->private);
prv->iocb_free[prv->iocb_free_count++] = io;
}
return rsp;
}
-
+
+int tdaio_has_parent(struct disk_driver *dd)
+{
+ return 0;
+}
+
+int tdaio_get_parent(struct disk_driver *dd, struct disk_driver *parent)
+{
+ return -EINVAL;
+}
+
struct tap_disk tapdisk_aio = {
- "tapdisk_aio",
- sizeof(struct tdaio_state),
- tdaio_open,
- tdaio_queue_read,
- tdaio_queue_write,
- tdaio_submit,
- tdaio_get_fd,
- tdaio_close,
- tdaio_do_callbacks,
+ .disk_type = "tapdisk_aio",
+ .private_data_size = sizeof(struct tdaio_state),
+ .td_open = tdaio_open,
+ .td_queue_read = tdaio_queue_read,
+ .td_queue_write = tdaio_queue_write,
+ .td_submit = tdaio_submit,
+ .td_has_parent = tdaio_has_parent,
+ .td_get_parent = tdaio_get_parent,
+ .td_close = tdaio_close,
+ .td_do_callbacks = tdaio_do_callbacks,
};
/******AIO DEFINES******/
#define REQUEST_ASYNC_FD 1
-#define MAX_QCOW_IDS 0xFFFF
#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
struct pending_aio {
int nb_sectors;
char *buf;
uint64_t sector;
- int qcow_idx;
};
#define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
struct tdqcow_state {
int fd; /*Main Qcow file descriptor */
uint64_t fd_end; /*Store a local record of file length */
- int bfd; /*Backing file descriptor*/
char *name; /*Record of the filename*/
- int poll_pipe[2]; /*dummy fd for polling on */
+ uint32_t backing_file_size;
+ uint64_t backing_file_offset;
int encrypted; /*File contents are encrypted or plain*/
int cluster_bits; /*Determines length of cluster as
*indicated by file hdr*/
AES_KEY aes_decrypt_key; /*AES key*/
/* libaio state */
io_context_t aio_ctx;
- int nr_reqs [MAX_QCOW_IDS];
struct iocb iocb_list [MAX_AIO_REQS];
struct iocb *iocb_free [MAX_AIO_REQS];
struct pending_aio pending_aio[MAX_AIO_REQS];
static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset);
-static int init_aio_state(struct td_state *bs)
+static int init_aio_state(struct disk_driver *dd)
{
int i;
- struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+ struct td_state *bs = dd->td_state;
+ struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
long ioidx;
/*Initialize Locking bitmap*/
for (i=0;i<MAX_AIO_REQS;i++)
s->iocb_free[i] = &s->iocb_list[i];
- for (i=0;i<MAX_QCOW_IDS;i++)
- s->nr_reqs[i] = 0;
+
DPRINTF("AIO state initialised\n");
return 0;
if(!md) return 0;
- if (MD5((unsigned char *)ptr, len, md) != md) return 0;
+ if (MD5((unsigned char *)ptr, len, md) != md) {
+ free(md);
+ return 0;
+ }
memcpy(&ret, md, sizeof(uint32_t));
free(md);
static int get_filesize(char *filename, uint64_t *size, struct stat *st)
{
- int blockfd;
+ int fd;
+ QCowHeader header;
/*Set to the backing file size*/
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ return -1;
+ if (read(fd, &header, sizeof(header)) < sizeof(header)) {
+ close(fd);
+ return -1;
+ }
+ close(fd);
+
+ be32_to_cpus(&header.magic);
+ be64_to_cpus(&header.size);
+ if (header.magic == QCOW_MAGIC) {
+ *size = header.size >> SECTOR_SHIFT;
+ return 0;
+ }
+
if(S_ISBLK(st->st_mode)) {
- blockfd = open(filename, O_RDONLY);
- if (blockfd < 0)
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
return -1;
- if (ioctl(blockfd,BLKGETSIZE,size)!=0) {
+ if (ioctl(fd,BLKGETSIZE,size)!=0) {
printf("Unable to get Block device size\n");
- close(blockfd);
+ close(fd);
return -1;
}
- close(blockfd);
+ close(fd);
} else *size = (st->st_size >> SECTOR_SHIFT);
return 0;
}
-static int qcow_set_key(struct td_state *bs, const char *key)
+static int qcow_set_key(struct tdqcow_state *s, const char *key)
{
- struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
uint8_t keybuf[16];
int len, i;
return 0;
}
-static int async_read(struct tdqcow_state *s, int fd, int size,
- uint64_t offset,
- char *buf, td_callback_t cb,
- int id, uint64_t sector, int qcow_idx, void *private)
+static int async_read(struct tdqcow_state *s, int size,
+ uint64_t offset, char *buf, td_callback_t cb,
+ int id, uint64_t sector, void *private)
{
struct iocb *io;
struct pending_aio *pio;
pio->nb_sectors = size/512;
pio->buf = buf;
pio->sector = sector;
- pio->qcow_idx = qcow_idx;
- io_prep_pread(io, fd, buf, size, offset);
+ io_prep_pread(io, s->fd, buf, size, offset);
io->data = (void *)ioidx;
s->iocb_queue[s->iocb_queued++] = io;
return 1;
}
-static int async_write(struct tdqcow_state *s, int fd, int size,
- uint64_t offset,
- char *buf, td_callback_t cb,
- int id, uint64_t sector, int qcow_idx, void *private)
+static int async_write(struct tdqcow_state *s, int size,
+ uint64_t offset, char *buf, td_callback_t cb,
+ int id, uint64_t sector, void *private)
{
struct iocb *io;
struct pending_aio *pio;
pio->nb_sectors = size/512;
pio->buf = buf;
pio->sector = sector;
- pio->qcow_idx = qcow_idx;
- io_prep_pwrite(io, fd, buf, size, offset);
+ io_prep_pwrite(io, s->fd, buf, size, offset);
io->data = (void *)ioidx;
s->iocb_queue[s->iocb_queued++] = io;
return;
}
-/*TODO - Use a freelist*/
-static int get_free_idx(struct tdqcow_state *s)
-{
- int i;
-
- for(i = 0; i < MAX_QCOW_IDS; i++) {
- if(s->nr_reqs[i] == 0) return i;
- }
- return -1;
-}
-
/*
* The crypt function is compatible with the linux cryptoloop
* algorithm for < 4 GB images. NOTE: out_buf == in_buf is
{
int ret, i;
int current = 0, rem = 0;
- int sectors = (length + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
+ uint64_t sectors;
struct stat st;
- char buf[DEFAULT_SECTOR_SIZE];
+ char *buf;
/* If length is greater than the current file len
* we synchronously write zeroes to the end of the
* file, otherwise we truncate the length down
*/
- memset(buf, 0x00, DEFAULT_SECTOR_SIZE);
ret = fstat(fd, &st);
- if (ret == -1)
+ if (ret == -1)
return -1;
if (S_ISBLK(st.st_mode))
return 0;
-
+
+ sectors = (length + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
current = (st.st_size + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
- rem = st.st_size % DEFAULT_SECTOR_SIZE;
+ rem = st.st_size % DEFAULT_SECTOR_SIZE;
/* If we are extending this file, we write zeros to the end --
* this tries to ensure that the extents allocated wind up being
*/
if(st.st_size < sectors * DEFAULT_SECTOR_SIZE) {
/*We are extending the file*/
+ if ((ret = posix_memalign((void **)&buf,
+ 512, DEFAULT_SECTOR_SIZE))) {
+ DPRINTF("posix_memalign failed: %d\n", ret);
+ return -1;
+ }
+ memset(buf, 0x00, DEFAULT_SECTOR_SIZE);
if (lseek(fd, 0, SEEK_END)==-1) {
- fprintf(stderr,
- "Lseek EOF failed (%d), internal error\n",
+ DPRINTF("Lseek EOF failed (%d), internal error\n",
errno);
+ free(buf);
return -1;
}
if (rem) {
ret = write(fd, buf, rem);
- if (ret != rem)
+ if (ret != rem) {
+ DPRINTF("write failed: ret = %d, err = %s\n",
+ ret, strerror(errno));
+ free(buf);
return -1;
+ }
}
for (i = current; i < sectors; i++ ) {
ret = write(fd, buf, DEFAULT_SECTOR_SIZE);
- if (ret != DEFAULT_SECTOR_SIZE)
+ if (ret != DEFAULT_SECTOR_SIZE) {
+ DPRINTF("write failed: ret = %d, err = %s\n",
+ ret, strerror(errno));
+ free(buf);
return -1;
+ }
}
-
+ free(buf);
} else if(sparse && (st.st_size > sectors * DEFAULT_SECTOR_SIZE))
- if (ftruncate(fd, sectors * DEFAULT_SECTOR_SIZE)==-1) {
- fprintf(stderr,
- "Ftruncate failed (%d), internal error\n",
- errno);
+ if (ftruncate(fd, (off_t)sectors * DEFAULT_SECTOR_SIZE)==-1) {
+ DPRINTF("Ftruncate failed (%s)\n", strerror(errno));
return -1;
}
return 0;
*
* return 0 if not allocated.
*/
-static uint64_t get_cluster_offset(struct td_state *bs,
+static uint64_t get_cluster_offset(struct tdqcow_state *s,
uint64_t offset, int allocate,
int compressed_size,
int n_start, int n_end)
{
- struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
int min_index, i, j, l1_index, l2_index, l2_sector, l1_sector;
char *tmp_ptr, *tmp_ptr2, *l2_ptr, *l1_ptr;
uint64_t l2_offset, *l2_table, cluster_offset, tmp;
* entry is written before blocks.
*/
lseek(s->fd, s->l1_table_offset + (l1_sector << 12), SEEK_SET);
- if (write(s->fd, tmp_ptr, 4096) != 4096)
+ if (write(s->fd, tmp_ptr, 4096) != 4096) {
+ free(tmp_ptr);
return 0;
+ }
free(tmp_ptr);
new_l2_table = 1;
return cluster_offset;
}
-static void init_cluster_cache(struct td_state *bs)
+static void init_cluster_cache(struct disk_driver *dd)
{
- struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+ struct td_state *bs = dd->td_state;
+ struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
uint32_t count = 0;
int i, cluster_entries;
cluster_entries, s->cluster_size);
for (i = 0; i < bs->size; i += cluster_entries) {
- if (get_cluster_offset(bs, i << 9, 0, 0, 0, 1)) count++;
+ if (get_cluster_offset(s, i << 9, 0, 0, 0, 1)) count++;
if (count >= L2_CACHE_SIZE) return;
}
DPRINTF("Finished cluster initialisation, added %d entries\n", count);
return;
}
-static int qcow_is_allocated(struct td_state *bs, int64_t sector_num,
+static int qcow_is_allocated(struct tdqcow_state *s, int64_t sector_num,
int nb_sectors, int *pnum)
{
- struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
-
int index_in_cluster, n;
uint64_t cluster_offset;
- cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
+ cluster_offset = get_cluster_offset(s, sector_num << 9, 0, 0, 0, 0);
index_in_cluster = sector_num & (s->cluster_sectors - 1);
n = s->cluster_sectors - index_in_cluster;
if (n > nb_sectors)
return 0;
}
+static inline void init_fds(struct disk_driver *dd)
+{
+ int i;
+ struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
+
+ for(i = 0; i < MAX_IOFD; i++)
+ dd->io_fd[i] = 0;
+
+ dd->io_fd[0] = s->poll_fd;
+}
+
/* Open the disk file and initialize qcow state. */
-int tdqcow_open (struct td_state *bs, const char *name)
+int tdqcow_open (struct disk_driver *dd, const char *name)
{
int fd, len, i, shift, ret, size, l1_table_size;
- struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+ struct td_state *bs = dd->td_state;
+ struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
char *buf;
QCowHeader *header;
QCowHeader_ext *exthdr;
uint64_t final_cluster = 0;
DPRINTF("QCOW: Opening %s\n",name);
- /* set up a pipe so that we can hand back a poll fd that won't fire.*/
- ret = pipe(s->poll_pipe);
- if (ret != 0)
- return (0 - errno);
fd = open(name, O_RDWR | O_DIRECT | O_LARGEFILE);
if (fd < 0) {
s->fd = fd;
asprintf(&s->name,"%s", name);
- ASSERT(sizeof(header) < 512);
+ ASSERT(sizeof(QCowHeader) + sizeof(QCowHeader_ext) < 512);
ret = posix_memalign((void **)&buf, 512, 512);
if (ret != 0) goto fail;
s->cluster_alloc = s->l2_size;
bs->size = header->size / 512;
s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
-
+ s->backing_file_offset = header->backing_file_offset;
+ s->backing_file_size = header->backing_file_size;
+
/* read the level 1 table */
shift = s->cluster_bits + s->l2_bits;
s->l1_size = (header->size + (1LL << shift) - 1) >> shift;
if (read(fd, s->l1_table, l1_table_size) != l1_table_size)
goto fail;
- for(i = 0;i < s->l1_size; i++) {
+ for(i = 0; i < s->l1_size; i++) {
//be64_to_cpus(&s->l1_table[i]);
//DPRINTF("L1[%d] => %llu\n", i, s->l1_table[i]);
if (s->l1_table[i] > final_cluster)
if(ret != 0) goto fail;
s->cluster_cache_offset = -1;
- /* read the backing file name */
- s->bfd = -1;
- if (header->backing_file_offset != 0) {
- DPRINTF("Reading backing file data\n");
- len = header->backing_file_size;
- if (len > 1023)
- len = 1023;
-
- /*TODO - Fix read size for O_DIRECT and use original fd!*/
- fd = open(name, O_RDONLY | O_LARGEFILE);
-
- lseek(fd, header->backing_file_offset, SEEK_SET);
- if (read(fd, bs->backing_file, len) != len)
- goto fail;
- bs->backing_file[len] = '\0';
- close(fd);
- /***********************************/
-
- /*Open backing file*/
- fd = open(bs->backing_file, O_RDONLY | O_DIRECT | O_LARGEFILE);
- if (fd < 0) {
- DPRINTF("Unable to open backing file: %s\n",
- bs->backing_file);
- goto fail;
- }
- s->bfd = fd;
+ if (s->backing_file_offset != 0)
s->cluster_alloc = 1; /*Cannot use pre-alloc*/
- }
bs->sector_size = 512;
bs->info = 0;
/*Detect min_cluster_alloc*/
s->min_cluster_alloc = 1; /*Default*/
- if (s->bfd == -1 && (s->l1_table_offset % 4096 == 0) ) {
+ if (s->backing_file_offset == 0 && s->l1_table_offset % 4096 == 0) {
/*We test to see if the xen magic # exists*/
exthdr = (QCowHeader_ext *)(buf + sizeof(QCowHeader));
be32_to_cpus(&exthdr->xmagic);
}
end_xenhdr:
- if (init_aio_state(bs)!=0) {
+ if (init_aio_state(dd)!=0) {
DPRINTF("Unable to initialise AIO state\n");
goto fail;
}
+ init_fds(dd);
s->fd_end = (final_cluster == 0 ? (s->l1_table_offset + l1_table_size) :
(final_cluster + s->cluster_size));
return -1;
}
- int tdqcow_queue_read(struct td_state *bs, uint64_t sector,
- int nb_sectors, char *buf, td_callback_t cb,
- int id, void *private)
+int tdqcow_queue_read(struct disk_driver *dd, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private)
{
- struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
- int ret = 0, index_in_cluster, n, i, qcow_idx, asubmit = 0;
- uint64_t cluster_offset;
+ struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
+ int ret = 0, index_in_cluster, n, i, rsp = 0;
+ uint64_t cluster_offset, sec, nr_secs;
+
+ sec = sector;
+ nr_secs = nb_sectors;
/*Check we can get a lock*/
- for (i = 0; i < nb_sectors; i++)
- if (!aio_can_lock(s, sector + i)) {
- DPRINTF("AIO_CAN_LOCK failed [%llu]\n",
- (long long) sector + i);
- return -EBUSY;
- }
-
+ for (i = 0; i < nb_sectors; i++)
+ if (!aio_can_lock(s, sector + i))
+ return cb(dd, -EBUSY, sector, nb_sectors, id, private);
+
/*We store a local record of the request*/
- qcow_idx = get_free_idx(s);
while (nb_sectors > 0) {
cluster_offset =
- get_cluster_offset(bs, sector << 9, 0, 0, 0, 0);
+ get_cluster_offset(s, sector << 9, 0, 0, 0, 0);
index_in_cluster = sector & (s->cluster_sectors - 1);
n = s->cluster_sectors - index_in_cluster;
if (n > nb_sectors)
n = nb_sectors;
- if (s->iocb_free_count == 0 || !aio_lock(s, sector)) {
- DPRINTF("AIO_LOCK or iocb_free_count (%d) failed"
- "[%llu]\n", s->iocb_free_count,
- (long long) sector);
- return -ENOMEM;
- }
+ if (s->iocb_free_count == 0 || !aio_lock(s, sector))
+ return cb(dd, -EBUSY, sector, nb_sectors, id, private);
- if (!cluster_offset && (s->bfd > 0)) {
- s->nr_reqs[qcow_idx]++;
- asubmit += async_read(s, s->bfd, n * 512, sector << 9,
- buf, cb, id, sector,
- qcow_idx, private);
- } else if(!cluster_offset) {
- memset(buf, 0, 512 * n);
+ if(!cluster_offset) {
aio_unlock(s, sector);
+ ret = cb(dd, BLK_NOT_ALLOCATED,
+ sector, n, id, private);
+ if (ret == -EBUSY) {
+ /* mark remainder of request
+ * as busy and try again later */
+ return cb(dd, -EBUSY, sector + n,
+ nb_sectors - n, id, private);
+ } else rsp += ret;
} else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
+ aio_unlock(s, sector);
if (decompress_cluster(s, cluster_offset) < 0) {
- ret = -1;
+ rsp += cb(dd, -EIO, sector,
+ nb_sectors, id, private);
goto done;
}
memcpy(buf, s->cluster_cache + index_in_cluster * 512,
512 * n);
- } else {
- s->nr_reqs[qcow_idx]++;
- asubmit += async_read(s, s->fd, n * 512,
- (cluster_offset +
- index_in_cluster * 512),
- buf, cb, id, sector,
- qcow_idx, private);
+ rsp += cb(dd, 0, sector, n, id, private);
+ } else {
+ async_read(s, n * 512,
+ (cluster_offset + index_in_cluster * 512),
+ buf, cb, id, sector, private);
}
nb_sectors -= n;
sector += n;
buf += n * 512;
}
done:
- /*Callback if no async requests outstanding*/
- if (!asubmit) return cb(bs, ret == -1 ? -1 : 0, id, private);
-
- return 0;
+ return rsp;
}
- int tdqcow_queue_write(struct td_state *bs, uint64_t sector,
- int nb_sectors, char *buf, td_callback_t cb,
- int id, void *private)
+int tdqcow_queue_write(struct disk_driver *dd, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private)
{
- struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
- int ret = 0, index_in_cluster, n, i, qcow_idx, asubmit = 0;
- uint64_t cluster_offset;
+ struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
+ int ret = 0, index_in_cluster, n, i;
+ uint64_t cluster_offset, sec, nr_secs;
+
+ sec = sector;
+ nr_secs = nb_sectors;
/*Check we can get a lock*/
for (i = 0; i < nb_sectors; i++)
- if (!aio_can_lock(s, sector + i)) {
- DPRINTF("AIO_CAN_LOCK failed [%llu]\n",
- (long long) (sector + i));
- return -EBUSY;
- }
+ if (!aio_can_lock(s, sector + i))
+ return cb(dd, -EBUSY, sector, nb_sectors, id, private);
/*We store a local record of the request*/
- qcow_idx = get_free_idx(s);
while (nb_sectors > 0) {
index_in_cluster = sector & (s->cluster_sectors - 1);
n = s->cluster_sectors - index_in_cluster;
if (n > nb_sectors)
n = nb_sectors;
- if (s->iocb_free_count == 0 || !aio_lock(s, sector)){
- DPRINTF("AIO_LOCK or iocb_free_count (%d) failed"
- "[%llu]\n", s->iocb_free_count,
- (long long) sector);
- return -ENOMEM;
- }
-
- if (!IS_ZERO(buf,n * 512)) {
+ if (s->iocb_free_count == 0 || !aio_lock(s, sector))
+ return cb(dd, -EBUSY, sector, nb_sectors, id, private);
- cluster_offset = get_cluster_offset(bs, sector << 9,
- 1, 0,
- index_in_cluster,
- index_in_cluster+n
- );
- if (!cluster_offset) {
- DPRINTF("Ooops, no write cluster offset!\n");
- ret = -1;
- goto done;
- }
+ cluster_offset = get_cluster_offset(s, sector << 9, 1, 0,
+ index_in_cluster,
+ index_in_cluster+n);
+ if (!cluster_offset) {
+ DPRINTF("Ooops, no write cluster offset!\n");
+ return cb(dd, -EIO, sector, nb_sectors, id, private);
+ }
- if (s->crypt_method) {
- encrypt_sectors(s, sector, s->cluster_data,
- (unsigned char *)buf, n, 1,
- &s->aes_encrypt_key);
- s->nr_reqs[qcow_idx]++;
- asubmit += async_write(s, s->fd, n * 512,
- (cluster_offset +
- index_in_cluster*512),
- (char *)s->cluster_data,
- cb, id, sector,
- qcow_idx, private);
- } else {
- s->nr_reqs[qcow_idx]++;
- asubmit += async_write(s, s->fd, n * 512,
- (cluster_offset +
- index_in_cluster*512),
- buf, cb, id, sector,
- qcow_idx, private);
- }
+ if (s->crypt_method) {
+ encrypt_sectors(s, sector, s->cluster_data,
+ (unsigned char *)buf, n, 1,
+ &s->aes_encrypt_key);
+ async_write(s, n * 512,
+ (cluster_offset + index_in_cluster*512),
+ (char *)s->cluster_data, cb, id, sector,
+ private);
} else {
- /*Write data contains zeros, but we must check to see
- if cluster already allocated*/
- cluster_offset = get_cluster_offset(bs, sector << 9,
- 0, 0,
- index_in_cluster,
- index_in_cluster+n
- );
- if(cluster_offset) {
- if (s->crypt_method) {
- encrypt_sectors(s, sector,
- s->cluster_data,
- (unsigned char *)buf,
- n, 1,
- &s->aes_encrypt_key);
- s->nr_reqs[qcow_idx]++;
- asubmit += async_write(s, s->fd,
- n * 512,
- (cluster_offset+
- index_in_cluster * 512),
- (char *)s->cluster_data, cb, id, sector,
- qcow_idx, private);
- } else {
- s->nr_reqs[qcow_idx]++;
- asubmit += async_write(s, s->fd, n*512,
- cluster_offset + index_in_cluster * 512,
- buf, cb, id, sector,
- qcow_idx, private);
- }
- }
- else aio_unlock(s, sector);
+ async_write(s, n * 512,
+ (cluster_offset + index_in_cluster*512),
+ buf, cb, id, sector, private);
}
+
nb_sectors -= n;
sector += n;
buf += n * 512;
}
s->cluster_cache_offset = -1; /* disable compressed cache */
-done:
- /*Callback if no async requests outstanding*/
- if (!asubmit) return cb(bs, ret == -1 ? -1 : 0, id, private);
-
return 0;
}
-int tdqcow_submit(struct td_state *bs)
+int tdqcow_submit(struct disk_driver *dd)
{
int ret;
- struct tdqcow_state *prv = (struct tdqcow_state *)bs->private;
+ struct tdqcow_state *prv = (struct tdqcow_state *)dd->private;
+
+ if (!prv->iocb_queued)
+ return 0;
- ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
+ ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
/* XXX: TODO: Handle error conditions here. */
/* Success case: */
prv->iocb_queued = 0;
- return ret;
-}
-
-
-int *tdqcow_get_fd(struct td_state *bs)
-{
- struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
- int *fds, i;
-
- fds = malloc(sizeof(int) * MAX_IOFD);
- /*initialise the FD array*/
- for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
-
- fds[0] = s->poll_fd;
- return fds;
+ return 0;
}
-int tdqcow_close(struct td_state *bs)
+int tdqcow_close(struct disk_driver *dd)
{
- struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+ struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
uint32_t cksum, out;
int fd, offset;
close(fd);
}
+ io_destroy(s->aio_ctx);
free(s->name);
free(s->l1_table);
free(s->l2_cache);
return 0;
}
-int tdqcow_do_callbacks(struct td_state *s, int sid)
+int tdqcow_do_callbacks(struct disk_driver *dd, int sid)
{
int ret, i, rsp = 0,*ptr;
struct io_event *ep;
- struct tdqcow_state *prv = (struct tdqcow_state *)s->private;
+ struct tdqcow_state *prv = (struct tdqcow_state *)dd->private;
if (sid > MAX_IOFD) return 1;
ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events,
NULL);
- for (ep=prv->aio_events, i = ret; i-->0; ep++) {
+ for (ep = prv->aio_events, i = ret; i-- > 0; ep++) {
struct iocb *io = ep->obj;
struct pending_aio *pio;
pio = &prv->pending_aio[(long)io->data];
aio_unlock(prv, pio->sector);
- if (pio->id >= 0) {
- if (prv->crypt_method)
- encrypt_sectors(prv, pio->sector,
- (unsigned char *)pio->buf,
- (unsigned char *)pio->buf,
- pio->nb_sectors, 0,
- &prv->aes_decrypt_key);
- prv->nr_reqs[pio->qcow_idx]--;
- if (prv->nr_reqs[pio->qcow_idx] == 0)
- rsp += pio->cb(s, ep->res == io->u.c.nbytes ? 0 : 1, pio->id,
- pio->private);
- } else if (pio->id == -2) free(pio->buf);
+
+ if (prv->crypt_method)
+ encrypt_sectors(prv, pio->sector,
+ (unsigned char *)pio->buf,
+ (unsigned char *)pio->buf,
+ pio->nb_sectors, 0,
+ &prv->aes_decrypt_key);
+
+ rsp += pio->cb(dd, ep->res == io->u.c.nbytes ? 0 : 1,
+ pio->sector, pio->nb_sectors,
+ pio->id, pio->private);
prv->iocb_free[prv->iocb_free_count++] = io;
}
}
int qcow_create(const char *filename, uint64_t total_size,
- const char *backing_file, int sparse)
+ const char *backing_file, int sparse)
{
int fd, header_size, backing_filename_len, l1_size, i;
int shift, length, adjust, flags = 0, ret = 0;
return 0;
}
-int qcow_make_empty(struct td_state *bs)
+int qcow_make_empty(struct tdqcow_state *s)
{
- struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
uint32_t l1_length = s->l1_size * sizeof(uint64_t);
memset(s->l1_table, 0, l1_length);
return 0;
}
-int qcow_get_cluster_size(struct td_state *bs)
+int qcow_get_cluster_size(struct tdqcow_state *s)
{
- struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
-
return s->cluster_size;
}
/* XXX: put compressed sectors first, then all the cluster aligned
tables to avoid losing bytes in alignment */
-int qcow_compress_cluster(struct td_state *bs, int64_t sector_num,
+int qcow_compress_cluster(struct tdqcow_state *s, int64_t sector_num,
const uint8_t *buf)
{
- struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
z_stream strm;
int ret, out_len;
uint8_t *out_buf;
/* could not compress: write normal cluster */
//tdqcow_queue_write(bs, sector_num, buf, s->cluster_sectors);
} else {
- cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
+ cluster_offset = get_cluster_offset(s, sector_num << 9, 2,
out_len, 0, 0);
cluster_offset &= s->cluster_offset_mask;
lseek(s->fd, cluster_offset, SEEK_SET);
return 0;
}
+int tdqcow_has_parent(struct disk_driver *dd)
+{
+ struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
+ return (s->backing_file_offset ? 1 : 0);
+}
+
+int tdqcow_get_parent(struct disk_driver *cdd, struct disk_driver *pdd)
+{
+ off_t off;
+ char *buf, *filename;
+ int len, secs, ret = -1;
+ struct tdqcow_state *child = (struct tdqcow_state *)cdd->private;
+
+ if (!child->backing_file_offset)
+ return -1;
+
+ /* read the backing file name */
+ len = child->backing_file_size;
+ off = child->backing_file_offset - (child->backing_file_offset % 512);
+ secs = (len + (child->backing_file_offset - off) + 511) >> 9;
+
+ if (posix_memalign((void **)&buf, 512, secs << 9))
+ return -1;
+
+ if (lseek(child->fd, off, SEEK_SET) == (off_t)-1)
+ goto out;
+
+ if (read(child->fd, buf, secs << 9) != secs << 9)
+ goto out;
+ filename = buf + (child->backing_file_offset - off);
+ filename[len] = '\0';
+
+ /*Open backing file*/
+ ret = tdqcow_open(pdd, filename);
+ out:
+ free(buf);
+ return ret;
+}
+
struct tap_disk tapdisk_qcow = {
- "tapdisk_qcow",
- sizeof(struct tdqcow_state),
- tdqcow_open,
- tdqcow_queue_read,
- tdqcow_queue_write,
- tdqcow_submit,
- tdqcow_get_fd,
- tdqcow_close,
- tdqcow_do_callbacks,
+ .disk_type = "tapdisk_qcow",
+ .private_data_size = sizeof(struct tdqcow_state),
+ .td_open = tdqcow_open,
+ .td_queue_read = tdqcow_queue_read,
+ .td_queue_write = tdqcow_queue_write,
+ .td_submit = tdqcow_submit,
+ .td_has_parent = tdqcow_has_parent,
+ .td_get_parent = tdqcow_get_parent,
+ .td_close = tdqcow_close,
+ .td_do_callbacks = tdqcow_do_callbacks,
};
-
return 0;
}
+static inline void init_fds(struct disk_driver *dd)
+{
+ int i;
+ struct tdram_state *prv = (struct tdram_state *)dd->private;
+
+ for(i =0 ; i < MAX_IOFD; i++)
+ dd->io_fd[i] = 0;
+
+ dd->io_fd[0] = prv->poll_pipe[0];
+}
+
/* Open the disk file and initialize ram state. */
-int tdram_open (struct td_state *s, const char *name)
+int tdram_open (struct disk_driver *dd, const char *name)
{
- int i, fd, ret = 0, count = 0;
- struct tdram_state *prv = (struct tdram_state *)s->private;
- uint64_t size;
char *p;
- s->private = prv;
+ uint64_t size;
+ int i, fd, ret = 0, count = 0;
+ struct td_state *s = dd->td_state;
+ struct tdram_state *prv = (struct tdram_state *)dd->private;
connections++;
ret = 0;
}
+ init_fds(dd);
done:
return ret;
}
- int tdram_queue_read(struct td_state *s, uint64_t sector,
- int nb_sectors, char *buf, td_callback_t cb,
- int id, void *private)
+ int tdram_queue_read(struct disk_driver *dd, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private)
{
- struct tdram_state *prv = (struct tdram_state *)s->private;
+ struct td_state *s = dd->td_state;
+ struct tdram_state *prv = (struct tdram_state *)dd->private;
int size = nb_sectors * s->sector_size;
uint64_t offset = sector * (uint64_t)s->sector_size;
- int ret;
memcpy(buf, img + offset, size);
- ret = size;
- cb(s, (ret < 0) ? ret: 0, id, private);
-
- return ret;
+ return cb(dd, 0, sector, nb_sectors, id, private);
}
- int tdram_queue_write(struct td_state *s, uint64_t sector,
- int nb_sectors, char *buf, td_callback_t cb,
- int id, void *private)
+int tdram_queue_write(struct disk_driver *dd, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private)
{
- struct tdram_state *prv = (struct tdram_state *)s->private;
+ struct td_state *s = dd->td_state;
+ struct tdram_state *prv = (struct tdram_state *)dd->private;
int size = nb_sectors * s->sector_size;
uint64_t offset = sector * (uint64_t)s->sector_size;
- int ret;
- /*We assume that write access is controlled at a higher level for multiple disks*/
+ /* We assume that write access is controlled
+ * at a higher level for multiple disks */
memcpy(img + offset, buf, size);
- ret = size;
-
- cb(s, (ret < 0) ? ret : 0, id, private);
- return ret;
+ return cb(dd, 0, sector, nb_sectors, id, private);
}
-int tdram_submit(struct td_state *s)
+int tdram_submit(struct disk_driver *dd)
{
return 0;
}
-
-int *tdram_get_fd(struct td_state *s)
+int tdram_close(struct disk_driver *dd)
{
- struct tdram_state *prv = (struct tdram_state *)s->private;
- int *fds, i;
-
- fds = malloc(sizeof(int) * MAX_IOFD);
- /*initialise the FD array*/
- for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
-
- fds[0] = prv->poll_pipe[0];
- return fds;
-}
-
-int tdram_close(struct td_state *s)
-{
- struct tdram_state *prv = (struct tdram_state *)s->private;
+ struct tdram_state *prv = (struct tdram_state *)dd->private;
connections--;
return 0;
}
-int tdram_do_callbacks(struct td_state *s, int sid)
+int tdram_do_callbacks(struct disk_driver *dd, int sid)
{
/* always ask for a kick */
return 1;
}
+int tdram_has_parent(struct disk_driver *dd)
+{
+ return 0;
+}
+
+int tdram_get_parent(struct disk_driver *dd, struct disk_driver *parent)
+{
+ return -EINVAL;
+}
+
struct tap_disk tapdisk_ram = {
- "tapdisk_ram",
- sizeof(struct tdram_state),
- tdram_open,
- tdram_queue_read,
- tdram_queue_write,
- tdram_submit,
- tdram_get_fd,
- tdram_close,
- tdram_do_callbacks,
+ .disk_type = "tapdisk_ram",
+ .private_data_size = sizeof(struct tdram_state),
+ .td_open = tdram_open,
+ .td_queue_read = tdram_queue_read,
+ .td_queue_write = tdram_queue_write,
+ .td_submit = tdram_submit,
+ .td_has_parent = tdram_has_parent,
+ .td_get_parent = tdram_get_parent,
+ .td_close = tdram_close,
+ .td_do_callbacks = tdram_do_callbacks,
};
-
return 0;
}
+static inline void init_fds(struct disk_driver *dd)
+{
+ int i;
+ struct tdsync_state *prv = (struct tdsync_state *)dd->private;
+
+ for(i = 0; i < MAX_IOFD; i++)
+ dd->io_fd[i] = 0;
+
+ dd->io_fd[0] = prv->poll_pipe[0];
+}
+
/* Open the disk file and initialize aio state. */
-int tdsync_open (struct td_state *s, const char *name)
+int tdsync_open (struct disk_driver *dd, const char *name)
{
int i, fd, ret = 0;
- struct tdsync_state *prv = (struct tdsync_state *)s->private;
- s->private = prv;
+ struct td_state *s = dd->td_state;
+ struct tdsync_state *prv = (struct tdsync_state *)dd->private;
/* set up a pipe so that we can hand back a poll fd that won't fire.*/
ret = pipe(prv->poll_pipe);
prv->fd = fd;
+ init_fds(dd);
ret = get_image_info(s, fd);
done:
return ret;
}
- int tdsync_queue_read(struct td_state *s, uint64_t sector,
+ int tdsync_queue_read(struct disk_driver *dd, uint64_t sector,
int nb_sectors, char *buf, td_callback_t cb,
int id, void *private)
{
- struct tdsync_state *prv = (struct tdsync_state *)s->private;
+ struct td_state *s = dd->td_state;
+ struct tdsync_state *prv = (struct tdsync_state *)dd->private;
int size = nb_sectors * s->sector_size;
uint64_t offset = sector * (uint64_t)s->sector_size;
int ret;
}
} else ret = 0 - errno;
- cb(s, (ret < 0) ? ret: 0, id, private);
-
- return 1;
+ return cb(dd, (ret < 0) ? ret: 0, sector, nb_sectors, id, private);
}
- int tdsync_queue_write(struct td_state *s, uint64_t sector,
+ int tdsync_queue_write(struct disk_driver *dd, uint64_t sector,
int nb_sectors, char *buf, td_callback_t cb,
int id, void *private)
{
- struct tdsync_state *prv = (struct tdsync_state *)s->private;
+ struct td_state *s = dd->td_state;
+ struct tdsync_state *prv = (struct tdsync_state *)dd->private;
int size = nb_sectors * s->sector_size;
uint64_t offset = sector * (uint64_t)s->sector_size;
int ret = 0;
}
} else ret = 0 - errno;
- cb(s, (ret < 0) ? ret : 0, id, private);
-
- return 1;
+ return cb(dd, (ret < 0) ? ret : 0, sector, nb_sectors, id, private);
}
-int tdsync_submit(struct td_state *s)
+int tdsync_submit(struct disk_driver *dd)
{
return 0;
}
-
-int *tdsync_get_fd(struct td_state *s)
-{
- struct tdsync_state *prv = (struct tdsync_state *)s->private;
-
- int *fds, i;
-
- fds = malloc(sizeof(int) * MAX_IOFD);
- /*initialise the FD array*/
- for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
-
- fds[0] = prv->poll_pipe[0];
- return fds;
-}
-
-int tdsync_close(struct td_state *s)
+int tdsync_close(struct disk_driver *dd)
{
- struct tdsync_state *prv = (struct tdsync_state *)s->private;
+ struct tdsync_state *prv = (struct tdsync_state *)dd->private;
close(prv->fd);
close(prv->poll_pipe[0]);
return 0;
}
-int tdsync_do_callbacks(struct td_state *s, int sid)
+int tdsync_do_callbacks(struct disk_driver *dd, int sid)
{
/* always ask for a kick */
return 1;
}
+int tdsync_has_parent(struct disk_driver *dd)
+{
+ return 0;
+}
+
+int tdsync_get_parent(struct disk_driver *dd, struct disk_driver *parent)
+{
+ return -EINVAL;
+}
+
struct tap_disk tapdisk_sync = {
- "tapdisk_sync",
- sizeof(struct tdsync_state),
- tdsync_open,
- tdsync_queue_read,
- tdsync_queue_write,
- tdsync_submit,
- tdsync_get_fd,
- tdsync_close,
- tdsync_do_callbacks,
+ .disk_type = "tapdisk_sync",
+ .private_data_size = sizeof(struct tdsync_state),
+ .td_open = tdsync_open,
+ .td_queue_read = tdsync_queue_read,
+ .td_queue_write = tdsync_queue_write,
+ .td_submit = tdsync_submit,
+ .td_has_parent = tdsync_has_parent,
+ .td_get_parent = tdsync_get_parent,
+ .td_close = tdsync_close,
+ .td_do_callbacks = tdsync_do_callbacks,
};
-
unsigned int cluster_sectors;
};
+static inline void init_fds(struct disk_driver *dd)
+{
+ int i;
+ struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private;
+
+ for (i = 0; i < MAX_IOFD; i++)
+ dd->io_fd[i] = 0;
+
+ dd->io_fd[0] = prv->poll_pipe[0];
+}
/* Open the disk file and initialize aio state. */
-static int tdvmdk_open (struct td_state *s, const char *name)
+static int tdvmdk_open (struct disk_driver *dd, const char *name)
{
int ret, fd;
int l1_size, i;
uint32_t magic;
- struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
+ struct td_state *s = dd->td_state;
+ struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private;
/* set up a pipe so that we can hand back a poll fd that won't fire.*/
ret = pipe(prv->poll_pipe);
if (!prv->l2_cache)
goto fail;
prv->fd = fd;
+ init_fds(dd);
DPRINTF("VMDK File opened successfully\n");
return 0;
return -1;
}
-static uint64_t get_cluster_offset(struct td_state *s,
+static uint64_t get_cluster_offset(struct tdvmdk_state *prv,
uint64_t offset, int allocate)
{
- struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
unsigned int l1_index, l2_offset, l2_index;
int min_index, i, j;
uint32_t min_count, *l2_table, tmp;
return cluster_offset;
}
-static int tdvmdk_queue_read(struct td_state *s, uint64_t sector,
+static int tdvmdk_queue_read(struct disk_driver *dd, uint64_t sector,
int nb_sectors, char *buf, td_callback_t cb,
int id, void *private)
{
- struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
+ struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private;
int index_in_cluster, n;
uint64_t cluster_offset;
int ret = 0;
+
while (nb_sectors > 0) {
- cluster_offset = get_cluster_offset(s, sector << 9, 0);
+ cluster_offset = get_cluster_offset(prv, sector << 9, 0);
index_in_cluster = sector % prv->cluster_sectors;
n = prv->cluster_sectors - index_in_cluster;
if (n > nb_sectors)
buf += n * 512;
}
done:
- cb(s, ret == -1 ? -1 : 0, id, private);
-
- return 1;
+ return cb(dd, ret == -1 ? -1 : 0, sector, nb_sectors, id, private);
}
-static int tdvmdk_queue_write(struct td_state *s, uint64_t sector,
+static int tdvmdk_queue_write(struct disk_driver *dd, uint64_t sector,
int nb_sectors, char *buf, td_callback_t cb,
int id, void *private)
{
- struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
+ struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private;
int index_in_cluster, n;
uint64_t cluster_offset;
int ret = 0;
-
while (nb_sectors > 0) {
index_in_cluster = sector & (prv->cluster_sectors - 1);
n = prv->cluster_sectors - index_in_cluster;
if (n > nb_sectors)
n = nb_sectors;
- cluster_offset = get_cluster_offset(s, sector << 9, 1);
+ cluster_offset = get_cluster_offset(prv, sector << 9, 1);
if (!cluster_offset) {
ret = -1;
goto done;
buf += n * 512;
}
done:
- cb(s, ret == -1 ? -1 : 0, id, private);
-
- return 1;
+ return cb(dd, ret == -1 ? -1 : 0, sector, nb_sectors, id, private);
}
-static int tdvmdk_submit(struct td_state *s)
+static int tdvmdk_submit(struct disk_driver *dd)
{
return 0;
}
-
-static int *tdvmdk_get_fd(struct td_state *s)
-{
- struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
- int *fds, i;
-
- fds = malloc(sizeof(int) * MAX_IOFD);
- /*initialise the FD array*/
- for (i=0;i<MAX_IOFD;i++) fds[i] = 0;
-
- fds[0] = prv->poll_pipe[0];
- return fds;
-}
-
-static int tdvmdk_close(struct td_state *s)
+static int tdvmdk_close(struct disk_driver *dd)
{
- struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
+ struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private;
safer_free(prv->l1_table);
safer_free(prv->l1_backup_table);
return 0;
}
-static int tdvmdk_do_callbacks(struct td_state *s, int sid)
+static int tdvmdk_do_callbacks(struct disk_driver *dd, int sid)
{
/* always ask for a kick */
return 1;
}
+static int tdvmdk_has_parent(struct disk_driver *dd)
+{
+ return 0;
+}
+
+static int tdvmdk_get_parent(struct disk_driver *dd, struct disk_driver *parent)
+{
+ return -EINVAL;
+}
+
struct tap_disk tapdisk_vmdk = {
- "tapdisk_vmdk",
- sizeof(struct tdvmdk_state),
- tdvmdk_open,
- tdvmdk_queue_read,
- tdvmdk_queue_write,
- tdvmdk_submit,
- tdvmdk_get_fd,
- tdvmdk_close,
- tdvmdk_do_callbacks,
+ .disk_type = "tapdisk_vmdk",
+ .private_data_size = sizeof(struct tdvmdk_state),
+ .td_open = tdvmdk_open,
+ .td_queue_read = tdvmdk_queue_read,
+ .td_queue_write = tdvmdk_queue_write,
+ .td_submit = tdvmdk_submit,
+ .td_has_parent = tdvmdk_has_parent,
+ .td_get_parent = tdvmdk_get_parent,
+ .td_close = tdvmdk_close,
+ .td_do_callbacks = tdvmdk_do_callbacks,
};
-
return 0;
}
-static int send_responses(struct td_state *s, int res, int idx, void *private)
+static int send_responses(struct disk_driver *dd, int res, uint64_t sec,
+ int nr_secs, int idx, void *private)
{
if (res < 0) DFPRINTF("AIO FAILURE: res [%d]!\n",res);
int main(int argc, char *argv[])
{
- struct tap_disk *drv;
+ struct disk_driver dd;
struct td_state *s;
int ret = -1, fd, len;
fd_set readfds;
} else DFPRINTF("Qcow file created: size %llu sectors\n",
(long long unsigned)s->size);
- drv = &tapdisk_qcow;
- s->private = malloc(drv->private_data_size);
+ dd.td_state = s;
+ dd.drv = &tapdisk_qcow;
+ dd.private = malloc(dd.drv->private_data_size);
/*Open qcow file*/
- if (drv->td_open(s, argv[1])!=0) {
+ if (dd.drv->td_open(&dd, argv[1])!=0) {
DFPRINTF("Unable to open Qcow file [%s]\n",argv[1]);
exit(-1);
}
- io_fd = drv->td_get_fd(s);
+ io_fd = dd.io_fd;
/*Initialise the output string*/
memset(output,0x20,25);
len = (len >> 9) << 9;
}
- ret = drv->td_queue_write(s, i >> 9,
- len >> 9, buf,
- send_responses, 0, buf);
+ ret = dd.drv->td_queue_write(&dd, i >> 9,
+ len >> 9, buf,
+ send_responses, 0, buf);
if (!ret) submit_events++;
debug_output(i,s->size << 9);
if ((submit_events % 10 == 0) || complete)
- drv->td_submit(s);
+ dd.drv->td_submit(&dd);
timeout.tv_usec = 0;
} else {
ret = select(maxfds + 1, &readfds, (fd_set *) 0,
(fd_set *) 0, &timeout);
- if (ret > 0) drv->td_do_callbacks(s, 0);
+ if (ret > 0) dd.drv->td_do_callbacks(&dd, 0);
if (complete && (returned_events == submit_events))
running = 0;
}
memcpy(output+prev+1,"=",1);
DFPRINTF("\r%s 100%%\nTRANSFER COMPLETE\n\n", output);
- drv->td_close(s);
- free(s->private);
+ dd.drv->td_close(&dd);
+ free(dd.private);
free(s);
return 0;
static int returned_read_events = 0, returned_write_events = 0;
static int submit_events = 0;
static uint32_t read_idx = 0, write_idx = 0;
-struct tap_disk *drv1, *drv2;
-struct td_state *sqcow, *saio;
+struct disk_driver ddqcow, ddaio;
static uint64_t prev = 0, written = 0;
static char output[25];
return;
}
-static int send_write_responses(struct td_state *s, int res, int idx, void *private)
+static int send_write_responses(struct disk_driver *dd, int res, uint64_t sec,
+ int nr_secs, int idx, void *private)
{
if (res < 0) {
DFPRINTF("AIO FAILURE: res [%d]!\n",res);
if (complete && (returned_write_events == submit_events))
write_complete = 1;
- debug_output(written, s->size << 9);
+ debug_output(written, dd->td_state->size << 9);
free(private);
return 0;
}
-static int send_read_responses(struct td_state *s, int res, int idx, void *private)
+static int send_read_responses(struct disk_driver *dd, int res, uint64_t sec,
+ int nr_secs, int idx, void *private)
{
int ret;
if (complete && (returned_read_events == submit_events))
read_complete = 1;
- ret = drv2->td_queue_write(saio, idx, BLOCK_PROCESSSZ>>9, private,
- send_write_responses, idx, private);
+ ret = ddaio.drv->td_queue_write(&ddaio, idx, BLOCK_PROCESSSZ>>9, private,
+ send_write_responses, idx, private);
if (ret != 0) {
DFPRINTF("ERROR in submitting queue write!\n");
return 0;
if ( (complete && returned_read_events == submit_events) ||
(returned_read_events % 10 == 0) ) {
- drv2->td_submit(saio);
+ ddaio.drv->td_submit(&ddaio);
}
return 0;
exit(-1);
}
- sqcow = malloc(sizeof(struct td_state));
- saio = malloc(sizeof(struct td_state));
+ ddqcow.td_state = malloc(sizeof(struct td_state));
+ ddaio.td_state = malloc(sizeof(struct td_state));
/*Open qcow source file*/
- drv1 = &tapdisk_qcow;
- sqcow->private = malloc(drv1->private_data_size);
+ ddqcow.drv = &tapdisk_qcow;
+ ddqcow.private = malloc(ddqcow.drv->private_data_size);
- if (drv1->td_open(sqcow, argv[2])!=0) {
+ if (ddqcow.drv->td_open(&ddqcow, argv[2])!=0) {
DFPRINTF("Unable to open Qcow file [%s]\n",argv[2]);
exit(-1);
} else DFPRINTF("QCOW file opened, size %llu\n",
- (long long unsigned)sqcow->size);
+ (long long unsigned)ddqcow.td_state->size);
- qcowio_fd = drv1->td_get_fd(sqcow);
+ qcowio_fd = ddqcow.io_fd;
/*Setup aio destination file*/
ret = stat(argv[1],&finfo);
argv[1], 0 - errno);
exit(-1);
}
- if (ftruncate(fd, (off_t)sqcow->size<<9) < 0) {
+ if (ftruncate(fd, (off_t)ddqcow.td_state->size<<9) < 0) {
DFPRINTF("Unable to create file "
"[%s] of size %llu (errno %d). "
"Exiting...\n",
argv[1],
- (long long unsigned)sqcow->size<<9,
+ (long long unsigned)ddqcow.td_state->size<<9,
0 - errno);
close(fd);
exit(-1);
close(fd);
exit(-1);
}
- if (size < sqcow->size<<9) {
+ if (size < ddqcow.td_state->size<<9) {
DFPRINTF("ERROR: Not enough space on device "
"%s (%lu bytes available, %llu bytes required\n",
argv[1], size,
- (long long unsigned)sqcow->size<<9);
+ (long long unsigned)ddqcow.td_state->size<<9);
close(fd);
exit(-1);
}
} else {
- if (ftruncate(fd, (off_t)sqcow->size<<9) < 0) {
+ if (ftruncate(fd, (off_t)ddqcow.td_state->size<<9) < 0) {
DFPRINTF("Unable to create file "
"[%s] of size %llu (errno %d). "
"Exiting...\n",
argv[1],
- (long long unsigned)sqcow->size<<9,
+ (long long unsigned)ddqcow.td_state->size<<9,
0 - errno);
close(fd);
exit(-1);
} else DFPRINTF("File [%s] truncated to length %llu "
"(%llu)\n",
argv[1],
- (long long unsigned)sqcow->size<<9,
- (long long unsigned)sqcow->size);
+ (long long unsigned)ddqcow.td_state->size<<9,
+ (long long unsigned)ddqcow.td_state->size);
}
close(fd);
}
/*Open aio destination file*/
- drv2 = &tapdisk_aio;
- saio->private = malloc(drv2->private_data_size);
+ ddaio.drv = &tapdisk_aio;
+ ddaio.private = malloc(ddaio.drv->private_data_size);
- if (drv2->td_open(saio, argv[1])!=0) {
+ if (ddaio.drv->td_open(&ddaio, argv[1])!=0) {
DFPRINTF("Unable to open Qcow file [%s]\n", argv[1]);
exit(-1);
}
- aio_fd = drv2->td_get_fd(saio);
+ aio_fd = ddaio.io_fd;
/*Initialise the output string*/
memset(output,0x20,25);
}
/*Attempt to read 4k sized blocks*/
- ret = drv1->td_queue_read(sqcow, i>>9,
- BLOCK_PROCESSSZ>>9, buf,
- send_read_responses, i>>9, buf);
+ ret = ddqcow.drv->td_queue_read(&ddqcow, i>>9,
+ BLOCK_PROCESSSZ>>9, buf,
+ send_read_responses, i>>9, buf);
if (ret < 0) {
DFPRINTF("UNABLE TO READ block [%llu]\n",
submit_events++;
}
- if (i >= sqcow->size<<9) {
+ if (i >= ddqcow.td_state->size<<9) {
complete = 1;
}
if ((submit_events % 10 == 0) || complete)
- drv1->td_submit(sqcow);
+ ddqcow.drv->td_submit(&ddqcow);
timeout.tv_usec = 0;
} else {
if (ret > 0) {
if (FD_ISSET(qcowio_fd[0], &readfds))
- drv1->td_do_callbacks(sqcow, 0);
+ ddqcow.drv->td_do_callbacks(&ddqcow, 0);
if (FD_ISSET(aio_fd[0], &readfds))
- drv2->td_do_callbacks(saio, 0);
+ ddaio.drv->td_do_callbacks(&ddaio, 0);
}
if (complete && (returned_write_events == submit_events))
running = 0;
int connected_disks = 0;
fd_list_entry_t *fd_start = NULL;
+int do_cow_read(struct disk_driver *dd, blkif_request_t *req,
+ int sidx, uint64_t sector, int nr_secs);
+
+#define td_for_each_disk(tds, drv) \
+ for (drv = tds->disks; drv != NULL; drv = drv->next)
+
void usage(void)
{
fprintf(stderr, "blktap-utils: v1.0.0\n");
static void unmap_disk(struct td_state *s)
{
tapdev_info_t *info = s->ring_info;
- struct tap_disk *drv = s->drv;
+ struct disk_driver *dd, *tmp;
fd_list_entry_t *entry;
- drv->td_close(s);
+ dd = s->disks;
+ while (dd) {
+ tmp = dd->next;
+ dd->drv->td_close(dd);
+ free(dd->private);
+ free(dd);
+ dd = tmp;
+ }
if (info != NULL && info->mem > 0)
munmap(info->mem, getpagesize() * BLKTAP_MMAP_REGION_SIZE);
free(s->fd_entry);
free(s->blkif);
free(s->ring_info);
- free(s->private);
free(s);
return;
static inline int LOCAL_FD_SET(fd_set *readfds)
{
fd_list_entry_t *ptr;
+ struct disk_driver *dd;
ptr = fd_start;
while (ptr != NULL) {
if (ptr->tap_fd) {
FD_SET(ptr->tap_fd, readfds);
- if (ptr->io_fd[READ])
- FD_SET(ptr->io_fd[READ], readfds);
- maxfds = (ptr->io_fd[READ] > maxfds ?
- ptr->io_fd[READ]: maxfds);
- maxfds = (ptr->tap_fd > maxfds ? ptr->tap_fd: maxfds);
+ td_for_each_disk(ptr->s, dd) {
+ if (dd->io_fd[READ])
+ FD_SET(dd->io_fd[READ], readfds);
+ maxfds = (dd->io_fd[READ] > maxfds ?
+ dd->io_fd[READ] : maxfds);
+ }
+ maxfds = (ptr->tap_fd > maxfds ? ptr->tap_fd : maxfds);
}
ptr = ptr->next;
}
return 0;
}
-static inline fd_list_entry_t *add_fd_entry(
- int tap_fd, int io_fd[MAX_IOFD], struct td_state *s)
+static inline fd_list_entry_t *add_fd_entry(int tap_fd, struct td_state *s)
{
fd_list_entry_t **pprev, *entry;
int i;
DPRINTF("Adding fd_list_entry\n");
/*Add to linked list*/
- s->fd_entry = entry = malloc(sizeof(fd_list_entry_t));
+ s->fd_entry = entry = malloc(sizeof(fd_list_entry_t));
entry->tap_fd = tap_fd;
- for (i = 0; i < MAX_IOFD; i++)
- entry->io_fd[i] = io_fd[i];
- entry->s = s;
- entry->next = NULL;
+ entry->s = s;
+ entry->next = NULL;
pprev = &fd_start;
while (*pprev != NULL)
static struct tap_disk *get_driver(int drivertype)
{
/* blktapctrl has passed us the driver type */
-
+
return dtypes[drivertype]->drv;
}
s = malloc(sizeof(struct td_state));
blkif = s->blkif = malloc(sizeof(blkif_t));
- s->ring_info = malloc(sizeof(tapdev_info_t));
+ s->ring_info = calloc(1, sizeof(tapdev_info_t));
- for (i = 0; i < MAX_REQUESTS; i++)
- blkif->pending_list[i].count = 0;
+ for (i = 0; i < MAX_REQUESTS; i++) {
+ blkif->pending_list[i].secs_pending = 0;
+ blkif->pending_list[i].submitting = 0;
+ }
return s;
}
+static struct disk_driver *disk_init(struct td_state *s, struct tap_disk *drv)
+{
+ struct disk_driver *dd;
+
+ dd = calloc(1, sizeof(struct disk_driver));
+ if (!dd)
+ return NULL;
+
+ dd->private = malloc(drv->private_data_size);
+ if (!dd->private) {
+ free(dd);
+ return NULL;
+ }
+
+ dd->drv = drv;
+ dd->td_state = s;
+
+ return dd;
+}
+
static int map_new_dev(struct td_state *s, int minor)
{
int tap_fd;
return -1;
}
+static int open_disk(struct td_state *s, struct disk_driver *dd, char *path)
+{
+ int err;
+ struct disk_driver *d = dd;
+
+ err = dd->drv->td_open(dd, path);
+ if (err)
+ return err;
+
+ /* load backing files as necessary */
+ while (d->drv->td_has_parent(d)) {
+ struct disk_driver *new;
+
+ new = calloc(1, sizeof(struct disk_driver));
+ if (!new)
+ goto fail;
+ new->drv = d->drv;
+ new->td_state = s;
+ new->private = malloc(new->drv->private_data_size);
+ if (!new->private) {
+ free(new);
+ goto fail;
+ }
+
+ err = d->drv->td_get_parent(d, new);
+ if (err)
+ goto fail;
+
+ d = d->next = new;
+ }
+
+ return 0;
+
+ fail:
+ DPRINTF("failed opening disk\n");
+ while (dd) {
+ d = dd->next;
+ dd->drv->td_close(dd);
+ free(dd->private);
+ free(dd);
+ dd = d;
+ }
+ return err;
+}
+
static int read_msg(char *buf)
{
int length, len, msglen, tap_fd, *io_fd;
msg_newdev_t *msg_dev;
msg_pid_t *msg_pid;
struct tap_disk *drv;
+ struct disk_driver *dd;
int ret = -1;
struct td_state *s = NULL;
fd_list_entry_t *entry;
if (s == NULL)
goto params_done;
- s->drv = drv;
- s->private = malloc(drv->private_data_size);
- if (s->private == NULL) {
+ s->disks = dd = disk_init(s, drv);
+ if (!dd) {
free(s);
goto params_done;
}
/*Open file*/
- ret = drv->td_open(s, path);
- io_fd = drv->td_get_fd(s);
+ ret = open_disk(s, dd, path);
+ if (ret)
+ goto params_done;
- entry = add_fd_entry(0, io_fd, s);
+ entry = add_fd_entry(0, s);
entry->cookie = msg->cookie;
- DPRINTF("Entered cookie %d\n",entry->cookie);
+ DPRINTF("Entered cookie %d\n", entry->cookie);
memset(buf, 0x00, MSG_SIZE);
free(path);
return 1;
-
-
case CTLMSG_NEWDEV:
msg_dev = (msg_newdev_t *)(buf + sizeof(msg_hdr_t));
s = get_state(msg->cookie);
- DPRINTF("Retrieving state, cookie %d.....[%s]\n",msg->cookie, (s == NULL ? "FAIL":"OK"));
+ DPRINTF("Retrieving state, cookie %d.....[%s]\n",
+ msg->cookie, (s == NULL ? "FAIL":"OK"));
if (s != NULL) {
ret = ((map_new_dev(s, msg_dev->devnum)
== msg_dev->devnum ? 0: -1));
}
}
-void io_done(struct td_state *s, int sid)
+void io_done(struct disk_driver *dd, int sid)
{
- struct tap_disk *drv = s->drv;
+ struct tap_disk *drv = dd->drv;
if (!run) return; /*We have received signal to close*/
- if (drv->td_do_callbacks(s, sid) > 0) kick_responses(s);
+ if (drv->td_do_callbacks(dd, sid) > 0) kick_responses(dd->td_state);
return;
}
-int send_responses(struct td_state *s, int res, int idx, void *private)
+static inline uint64_t
+segment_start(blkif_request_t *req, int sidx)
+{
+ int i;
+ uint64_t start = req->sector_number;
+
+ for (i = 0; i < sidx; i++)
+ start += (req->seg[i].last_sect - req->seg[i].first_sect + 1);
+
+ return start;
+}
+
+uint64_t sends, responds;
+int send_responses(struct disk_driver *dd, int res,
+ uint64_t sector, int nr_secs, int idx, void *private)
{
+ pending_req_t *preq;
blkif_request_t *req;
int responses_queued = 0;
+ struct td_state *s = dd->td_state;
blkif_t *blkif = s->blkif;
+ int sidx = (int)private, secs_done = nr_secs;
- req = &blkif->pending_list[idx].req;
-
- if ( (idx > MAX_REQUESTS-1) ||
- (blkif->pending_list[idx].count == 0) )
+ if ( (idx > MAX_REQUESTS-1) )
{
DPRINTF("invalid index returned(%u)!\n", idx);
return 0;
}
-
- if (res != 0) {
- blkif->pending_list[idx].status = BLKIF_RSP_ERROR;
+ preq = &blkif->pending_list[idx];
+ req = &preq->req;
+
+ if (res == BLK_NOT_ALLOCATED) {
+ res = do_cow_read(dd, req, sidx, sector, nr_secs);
+ if (res >= 0) {
+ secs_done = res;
+ res = 0;
+ } else
+ secs_done = 0;
}
- blkif->pending_list[idx].count--;
+ preq->secs_pending -= secs_done;
+
+ if (res == -EBUSY && preq->submitting)
+ return -EBUSY; /* propagate -EBUSY back to higher layers */
+ if (res)
+ preq->status = BLKIF_RSP_ERROR;
- if (blkif->pending_list[idx].count == 0)
+ if (!preq->submitting && preq->secs_pending == 0)
{
blkif_request_t tmp;
blkif_response_t *rsp;
-
- tmp = blkif->pending_list[idx].req;
+
+ tmp = preq->req;
rsp = (blkif_response_t *)req;
rsp->id = tmp.id;
rsp->operation = tmp.operation;
- rsp->status = blkif->pending_list[idx].status;
+ rsp->status = preq->status;
write_rsp_to_ring(s, rsp);
responses_queued++;
return responses_queued;
}
+int do_cow_read(struct disk_driver *dd, blkif_request_t *req,
+ int sidx, uint64_t sector, int nr_secs)
+{
+ char *page;
+ int ret, early;
+ uint64_t seg_start, seg_end;
+ struct td_state *s = dd->td_state;
+ tapdev_info_t *info = s->ring_info;
+ struct disk_driver *parent = dd->next;
+
+ seg_start = segment_start(req, sidx);
+ seg_end = seg_start + req->seg[sidx].last_sect + 1;
+
+ ASSERT(sector >= seg_start && sector + nr_secs <= seg_end);
+
+ page = (char *)MMAP_VADDR(info->vstart,
+ (unsigned long)req->id, sidx);
+ page += (req->seg[sidx].first_sect << SECTOR_SHIFT);
+ page += ((sector - seg_start) << SECTOR_SHIFT);
+
+ if (!parent) {
+ memset(page, 0, nr_secs << SECTOR_SHIFT);
+ return nr_secs;
+ }
+
+ /* reissue request to backing file */
+ ret = parent->drv->td_queue_read(parent, sector, nr_secs,
+ page, send_responses,
+ req->id, (void *)sidx);
+ if (ret > 0)
+ parent->early += ret;
+
+ return ((ret >= 0) ? 0 : ret);
+}
+
static void get_io_request(struct td_state *s)
{
- RING_IDX rp, rc, j, i, ret;
+ RING_IDX rp, rc, j, i;
blkif_request_t *req;
- int idx, nsects;
+ int idx, nsects, ret;
uint64_t sector_nr;
char *page;
int early = 0; /* count early completions */
- struct tap_disk *drv = s->drv;
+ struct disk_driver *dd = s->disks;
+ struct tap_disk *drv = dd->drv;
blkif_t *blkif = s->blkif;
tapdev_info_t *info = s->ring_info;
int page_size = getpagesize();
rmb();
for (j = info->fe_ring.req_cons; j != rp; j++)
{
- int done = 0;
+ int done = 0, start_seg = 0;
req = NULL;
req = RING_GET_REQUEST(&info->fe_ring, j);
++info->fe_ring.req_cons;
if (req == NULL) continue;
-
+
idx = req->id;
- ASSERT(blkif->pending_list[idx].count == 0);
- memcpy(&blkif->pending_list[idx].req, req, sizeof(*req));
- blkif->pending_list[idx].status = BLKIF_RSP_OKAY;
- blkif->pending_list[idx].count = req->nr_segments;
- sector_nr = req->sector_number;
+ if (info->busy.req) {
+ /* continue where we left off last time */
+ ASSERT(info->busy.req == req);
+ start_seg = info->busy.seg_idx;
+ sector_nr = segment_start(req, start_seg);
+ info->busy.seg_idx = 0;
+ info->busy.req = NULL;
+ } else {
+ ASSERT(blkif->pending_list[idx].secs_pending == 0);
+ memcpy(&blkif->pending_list[idx].req,
+ req, sizeof(*req));
+ blkif->pending_list[idx].status = BLKIF_RSP_OKAY;
+ blkif->pending_list[idx].submitting = 1;
+ sector_nr = req->sector_number;
+ }
- for (i = 0; i < req->nr_segments; i++) {
+ for (i = start_seg; i < req->nr_segments; i++) {
nsects = req->seg[i].last_sect -
req->seg[i].first_sect + 1;
(long long unsigned) sector_nr);
continue;
}
-
+
+ blkif->pending_list[idx].secs_pending += nsects;
+
switch (req->operation)
{
case BLKIF_OP_WRITE:
- ret = drv->td_queue_write(s, sector_nr,
- nsects, page, send_responses,
- idx, NULL);
- if (ret > 0) early += ret;
+ ret = drv->td_queue_write(dd, sector_nr,
+ nsects, page,
+ send_responses,
+ idx, (void *)i);
+ if (ret > 0) dd->early += ret;
else if (ret == -EBUSY) {
- /*
- * TODO: Sector is locked *
- * Need to put req back on queue *
- */
+ /* put req back on queue */
+ --info->fe_ring.req_cons;
+ info->busy.req = req;
+ info->busy.seg_idx = i;
+ goto out;
}
break;
case BLKIF_OP_READ:
- ret = drv->td_queue_read(s, sector_nr,
- nsects, page, send_responses,
- idx, NULL);
- if (ret > 0) early += ret;
+ ret = drv->td_queue_read(dd, sector_nr,
+ nsects, page,
+ send_responses,
+ idx, (void *)i);
+ if (ret > 0) dd->early += ret;
else if (ret == -EBUSY) {
- /*
- * TODO: Sector is locked *
- * Need to put req back on queue *
- */
+ /* put req back on queue */
+ --info->fe_ring.req_cons;
+ info->busy.req = req;
+ info->busy.seg_idx = i;
+ goto out;
}
break;
default:
}
sector_nr += nsects;
}
+ blkif->pending_list[idx].submitting = 0;
+ /* force write_rsp_to_ring for synchronous case */
+ if (blkif->pending_list[idx].secs_pending == 0)
+ dd->early += send_responses(dd, 0, 0, 0, idx, (void *)0);
}
+ out:
/*Batch done*/
- drv->td_submit(s);
-
- if (early > 0)
- io_done(s,10);
-
+ td_for_each_disk(s, dd) {
+ dd->early += dd->drv->td_submit(dd);
+ if (dd->early > 0) {
+ io_done(dd, 10);
+ dd->early = 0;
+ }
+ }
+
return;
}
char *p, *buf;
fd_set readfds, writefds;
fd_list_entry_t *ptr;
- struct tap_disk *drv;
struct td_state *s;
char openlogbuf[128];
-
+
if (argc != 3) usage();
daemonize();
signal (SIGINT, sig_handler);
/*Open the control channel*/
- fds[READ] = open(argv[1],O_RDWR|O_NONBLOCK);
+ fds[READ] = open(argv[1],O_RDWR|O_NONBLOCK);
fds[WRITE] = open(argv[2],O_RDWR|O_NONBLOCK);
if ( (fds[READ] < 0) || (fds[WRITE] < 0) )
{
- DPRINTF("FD open failed [%d,%d]\n",fds[READ], fds[WRITE]);
+ DPRINTF("FD open failed [%d,%d]\n", fds[READ], fds[WRITE]);
exit(-1);
}
{
ptr = fd_start;
while (ptr != NULL) {
- if (FD_ISSET(ptr->tap_fd, &readfds))
+ int progress_made = 0;
+ struct disk_driver *dd;
+ tapdev_info_t *info = ptr->s->ring_info;
+
+ td_for_each_disk(ptr->s, dd) {
+ if (dd->io_fd[READ] &&
+ FD_ISSET(dd->io_fd[READ],
+ &readfds)) {
+ io_done(dd, READ);
+ progress_made = 1;
+ }
+ }
+
+ if (FD_ISSET(ptr->tap_fd, &readfds) ||
+ (info->busy.req && progress_made))
get_io_request(ptr->s);
- if (ptr->io_fd[READ] &&
- FD_ISSET(ptr->io_fd[READ], &readfds))
- io_done(ptr->s, READ);
ptr = ptr->next;
}
ptr = fd_start;
while (ptr != NULL) {
s = ptr->s;
- drv = s->drv;
unmap_disk(s);
- drv->td_close(s);
- free(s->private);
free(s->blkif);
free(s->ring_info);
free(s);
* - The fd used for poll is an otherwise unused pipe, which allows poll to
* be safely called without ever returning anything.
*
+ * NOTE: tapdisk uses the number of sectors submitted per request as a
+ * ref count. Plugins must use the callback function to communicate the
+ * completion--or error--of every sector submitted to them.
*/
#ifndef TAPDISK_H_
#define SECTOR_SHIFT 9
#define DEFAULT_SECTOR_SIZE 512
+#define MAX_IOFD 2
+
+#define BLK_NOT_ALLOCATED 99
+
+struct td_state;
+struct tap_disk;
+
+struct disk_driver {
+ int early;
+ void *private;
+ int io_fd[MAX_IOFD];
+ struct tap_disk *drv;
+ struct td_state *td_state;
+ struct disk_driver *next;
+};
+
/* This structure represents the state of an active virtual disk. */
struct td_state {
- void *private;
- void *drv;
+ struct disk_driver *disks;
void *blkif;
void *image;
void *ring_info;
void *fd_entry;
- char backing_file[1024]; /*Used by differencing disks, e.g. qcow*/
unsigned long sector_size;
unsigned long long size;
unsigned int info;
};
/* Prototype of the callback to activate as requests complete. */
-typedef int (*td_callback_t)(struct td_state *s, int res, int id, void *prv);
+typedef int (*td_callback_t)(struct disk_driver *dd, int res, uint64_t sector,
+ int nb_sectors, int id, void *private);
/* Structure describing the interface to a virtual disk implementation. */
/* See note at the top of this file describing this interface. */
struct tap_disk {
const char *disk_type;
int private_data_size;
- int (*td_open) (struct td_state *s, const char *name);
- int (*td_queue_read) (struct td_state *s, uint64_t sector,
- int nb_sectors, char *buf, td_callback_t cb,
+ int (*td_open) (struct disk_driver *dd, const char *name);
+ int (*td_queue_read) (struct disk_driver *dd, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
int id, void *prv);
- int (*td_queue_write) (struct td_state *s, uint64_t sector,
- int nb_sectors, char *buf, td_callback_t cb,
+ int (*td_queue_write) (struct disk_driver *dd, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
int id, void *prv);
- int (*td_submit) (struct td_state *s);
- int *(*td_get_fd) (struct td_state *s);
- int (*td_close) (struct td_state *s);
- int (*td_do_callbacks)(struct td_state *s, int sid);
+ int (*td_submit) (struct disk_driver *dd);
+ int (*td_has_parent) (struct disk_driver *dd);
+ int (*td_get_parent) (struct disk_driver *dd, struct disk_driver *p);
+ int (*td_close) (struct disk_driver *dd);
+ int (*td_do_callbacks)(struct disk_driver *dd, int sid);
};
typedef struct disk_info {
extern struct tap_disk tapdisk_ram;
extern struct tap_disk tapdisk_qcow;
-#define MAX_DISK_TYPES 20
-#define MAX_IOFD 2
+#define MAX_DISK_TYPES 20
-#define DISK_TYPE_AIO 0
-#define DISK_TYPE_SYNC 1
-#define DISK_TYPE_VMDK 2
-#define DISK_TYPE_RAM 3
-#define DISK_TYPE_QCOW 4
+#define DISK_TYPE_AIO 0
+#define DISK_TYPE_SYNC 1
+#define DISK_TYPE_VMDK 2
+#define DISK_TYPE_RAM 3
+#define DISK_TYPE_QCOW 4
/*Define Individual Disk Parameters here */
typedef struct fd_list_entry {
int cookie;
int tap_fd;
- int io_fd[MAX_IOFD];
struct td_state *s;
struct fd_list_entry **pprev, *next;
} fd_list_entry_t;
int qcow_create(const char *filename, uint64_t total_size,
const char *backing_file, int flags);
-
#endif /*TAPDISK_H_*/
typedef struct {
blkif_request_t req;
- struct blkif *blkif;
- int count;
+ struct blkif *blkif;
+ int submitting;
+ int secs_pending;
int16_t status;
} pending_req_t;
void *prv; /* device-specific data */
void *info; /*Image parameter passing */
- pending_req_t pending_list[MAX_REQUESTS];
+ pending_req_t pending_list[MAX_REQUESTS];
int devnum;
int fds[2];
int be_id;
void free_blkif(blkif_t *blkif);
void __init_blkif(void);
+typedef struct busy_state {
+ int seg_idx;
+ blkif_request_t *req;
+} busy_state_t;
+
typedef struct tapdev_info {
int fd;
char *mem;
blkif_back_ring_t fe_ring;
unsigned long vstart;
blkif_t *blkif;
+ busy_state_t busy;
} tapdev_info_t;
typedef struct domid_translate {
}
if (!xs_unwatch(h, watch->node, token))
- DPRINTF("XENBUS Failed to release watch %s: %i\n",
- watch->node, er);
+ DPRINTF("XENBUS Failed to release watch %s\n",
+ watch->node);
list_del(&watch->list);
node = res[XS_WATCH_PATH];
token = res[XS_WATCH_TOKEN];
-
+
w = find_watch(token);
- if (w)
+ if (w)
w->callback(h, w, node);
free(res);